##################################################################
##################################################################
## Replication Material
## Stefan Müller: The Temporal Focus of Campaign Communication
## The Journal of Politics
## stefan.mueller@ucd.ie
##
## Script 7: Results reported in SI Section E
##################################################################
##################################################################


# Note: The file description_replication_material_jop_mueller.pdf describes the purpose of this 
# file in detail and lists the names and sources of all datasets 
# used in this script


# This script was run on the following R version, platform and OS:
# R version 3.6.0 (2019-04-26)
# Platform: Platform: x86_64-apple-darwin15.6.0 (64-bit)
# Running under: macOS Catalima 10.15.5


# load packages required to run this script
library(ggplot2)   # CRAN v3.3.2
library(scales)    # CRAN v1.1.1
library(GGally)    # CRAN v1.4.0
library(texreg)    # CRAN v1.36.23
library(lme4)      # CRAN v1.1-21
library(estimatr)  # CRAN v0.22.0
library(broom)     # CRAN v0.5.5
library(cowplot)   # CRAN v1.0.0
library(tidyr)     # CRAN v1.1.0
library(car)       # CRAN v3.0-7
library(ggeffects) # CRAN v0.14.2
library(emmeans)   # CRAN v1.4.5
library(Zelig)     # CRAN v5.1.6.1
library(stringr)   # CRAN v1.4.0
library(dplyr)     # CRAN v1.0.0

# create custom ggplot2 scheme
theme_baser <- function (){
  theme_minimal()  %+replace%
    theme(panel.grid.minor.x = element_blank(),
          panel.grid.minor.y = element_blank(),
          panel.grid.major.x = element_blank(),
          panel.grid.major.y = element_blank(),
          panel.border = element_rect(fill = NA,color = "black", size = 0.5,
                                      linetype = "solid"),
          legend.title = element_text(size = 15),
          plot.title = element_text(size = 15, face = "italic",
                                    vjust = 1.5, hjust = 0,
                                    margin=margin(0, 0, 12 ,0)),
          legend.position = "bottom",
          axis.ticks = element_line(size = 0.3),
          axis.ticks.length = unit(0.2, "cm"),
          legend.text=element_text(size = 13),
          strip.text = element_text(size = 15, hjust = 0.5,
                                    margin = margin(b = 5, r = 5, l = 5, t = 5)),
          axis.text = element_text(colour = "black", size = 13),
          axis.title = element_text(size = 13, hjust = 0.5))
}

# set theme
theme_set(theme_baser())

dat_combined_all <- readRDS("data_manifestos_classified.rds")

# remove all sentences with more than 99 tokens
dat_combined <- filter(dat_combined_all, ntoken < 100)
dat_combined$year <- as.numeric(dat_combined$year)


# formulas for sentiment aggregation
sent_log <- function(positive, negative, neg_negative, neg_positive, 
                     negations = FALSE) {
    if (negations == FALSE) {
        log((sum(positive) + 0.5) / (sum(negative) + 0.5))
    }
    
    else {
        log((sum(positive) + sum(neg_negative, na.rm = TRUE) + 0.5) / (sum(negative) + sum(neg_positive, na.rm = TRUE) + 0.5))
        
    }
}


sent_crabtreeetal <- function(positive, negative, neg_negative, neg_positive,
                              ntoken = ntoken, negations = FALSE) {
    
    if (negations == FALSE) {
        100 * (sum(positive) - sum(negative)) / sum(ntoken)
    }
    
    else {
        100 * (sum(positive) + sum(neg_negative, na.rm = TRUE) - sum(negative) - sum(neg_positive, na.rm = TRUE)) / sum(ntoken)
    }
    
}


dat_combined_class3 <- dat_combined %>% 
    group_by(manifesto_id, class) %>% 
    mutate(n_sentences_class = n(),
           n_words_class = sum(ntoken, na.rm = TRUE)) %>% 
    group_by(manifesto_id) %>% 
    mutate(n_sentences_manifesto =  n(),
           n_words_manifesto = sum(ntoken, na.rm = TRUE)) %>% 
    ungroup() %>% 
    group_by(manifesto_id, class) %>% 
    mutate(sent_liwc_log = sent_log(positive_liwc, negative_liwc)) %>% 
    mutate(sent_lsd_log = sent_log(positive_lsd, negative_lsd, 
                                   neg_negative = neg_negative_lsd,
                                   neg_positive = neg_positive_lsd,
                                   negations = TRUE)) %>% 
    mutate(sent_rauh_log = sent_log(positive_rauh, negative_rauh,
                                    neg_negative = neg_negative_rauh,
                                    neg_positive = neg_positive_rauh)) %>% 
    mutate(sent_liwc15_log = sent_log(positive_liwc15, negative_liwc15)) %>% 
    mutate(sent_liwc_crabtreeetal = sent_crabtreeetal(positive_liwc, negative_liwc,
                                                      ntoken = ntoken)) %>% 
    mutate(sent_liwc15_crabtreeetal = sent_crabtreeetal(positive_liwc15, negative_liwc15,
                                                        ntoken = ntoken)) %>% 
    mutate(sent_lsd_crabtreeetal = sent_crabtreeetal(positive_lsd, negative_lsd,
                                                     neg_negative = neg_negative_lsd,
                                                     neg_positive = neg_positive_lsd,
                                                     ntoken = ntoken)) %>% 
    mutate(sent_rauh_crabtreeetal = sent_crabtreeetal(positive_rauh, negative_rauh,
                                                      neg_negative = neg_negative_rauh,
                                                      neg_positive = neg_positive_rauh,
                                                      ntoken = ntoken))

# aggregate manifestos to 3 observations (one per class), 
# select relevant variables, and keep only one observation per manifesto-class
dat_aggregated_class3 <- dat_combined_class3 %>%
    ungroup() %>% 
    select(countryname, year, language,
           party, partyname, 
           party_name, 
           extremist_party,
           class,
           edate, starts_with("n_"),
           manifesto_id,
           election_date, election_id,
           seat_share_cmp_lag,
           starts_with("incumbency_"),
           party_family,
           rile,
           starts_with("sent_"),
           starts_with("gdp"),
           starts_with("inflation"),
           starts_with("unemployment")) %>% 
    unique()


# create factors for country, extremist party, and class
dat_aggregated_class3$countryname <- factor(dat_aggregated_class3$countryname)

dat_aggregated_class3$extremist_party <- factor(dat_aggregated_class3$extremist_party,
                                                levels = c("Not extremist party", "Extremist party"))

dat_aggregated_class3$class <- factor(dat_aggregated_class3$class,
                                      levels = c("Future", "Past", "Present"))


# run regression models predicting sentiment
lm_sent_liwc_basic <- lm_robust(sent_liwc_crabtreeetal ~ 
                                    incumbency_status2_factor * class + countryname,
                                data = dat_aggregated_class3,
                                cluster = manifesto_id)

lm_sent_liwc_basic_with_gdp <- lm_robust(sent_liwc_crabtreeetal ~ 
                                             incumbency_status2_factor * class + countryname,
                                         data = filter(dat_aggregated_class3, !is.na(gdp_growth_lag)),
                                         cluster = manifesto_id)



# regression models controlling for different economic indicators

lm_sent_liwc_extended_gdp <- lm_robust(sent_liwc_crabtreeetal ~ 
                                           incumbency_status2_factor * class + countryname +
                                           poly(rile, 2) + extremist_party + year + gdp_growth_lag,
                                       data = dat_aggregated_class3,
                                       cluster = manifesto_id)

lm_sent_liwc_extended_unemp <- lm_robust(sent_liwc_crabtreeetal ~ 
                                             incumbency_status2_factor * class + countryname + 
                                             poly(rile, 2) + extremist_party + year + unemployment_lag,
                                         data = dat_aggregated_class3,
                                         cluster = manifesto_id)

lm_sent_liwc_extended_infl <- lm_robust(sent_liwc_crabtreeetal ~ 
                                            incumbency_status2_factor * class + countryname +
                                            poly(rile, 2) + extremist_party + year + inflation_lag,
                                        data = dat_aggregated_class3,
                                        cluster = manifesto_id)



lm_sent_liwc_extended_gdp_rile_int <- lmer(sent_liwc_crabtreeetal ~ 
                                               incumbency_status2_factor * class * poly(rile, 2) 
                                           + countryname + extremist_party + 
                                               year + gdp_growth_lag + (1 | manifesto_id),
                                           data = dat_aggregated_class3)


# Table A12 ----
screenreg(list(lm_sent_liwc_basic,
               lm_sent_liwc_basic_with_gdp,
               lm_sent_liwc_extended_gdp,
               lm_sent_liwc_extended_unemp,
               lm_sent_liwc_extended_infl))



texreg(list(lm_sent_liwc_basic,
            lm_sent_liwc_basic_with_gdp,
            lm_sent_liwc_extended_gdp,
            lm_sent_liwc_extended_unemp,
            lm_sent_liwc_extended_infl),
       omit.coef = c("country*"),
       custom.coef.names = c("(Intercept)",
                             "Incumbent",
                             "Class: Past (ref.: Future)",
                             "Class: Present",
                             "Incumbent $\\times$ Past",
                             "Incumbent $\\times$ Present",
                             "RILE", 
                             "RILE$^2$",
                             "Extremist party",
                             "Year",
                             "GDP growth",
                             "Unemployment",
                             "Inflation"
       ),
       float.pos = "!h",
       label = "tab:reg_sent_liwc_main",
       fontsize = "footnotesize",
       caption.above =  TRUE,
       include.variance=FALSE,
       longtable=FALSE,
       use.packages=FALSE,
       custom.gof.names = c("R$^2$", "Adj. R$^2$",
                            "Number of observations",
                            "RMSE", "Number of clusters"),
       file="taba12.tex",
       caption = "Sentiment in party manifestos (using the LIWC sentiment dictionary and the aggregation formula recommended by \\textcite{crabtree19}  as dependent variable)",
       custom.note = ("\\parbox{0.7\\linewidth}{\\footnotesize \\vspace{2pt}%stars. \\\\
                      \\textit{Note}: 95\\% confidence intervals in parentheses. Model 2 reproduces Model 1 but only keeps observations with 
                      available information on GDP growth. All models are
                      linear regressions with robust standard errors clustered by manifesto. 
                      The models include country dummies which are omitted from the table.}"),)



lm_sent_liwc_extended_gdp_inc2 <- lm_robust(sent_liwc_crabtreeetal ~ 
                                                countryname +
                                                poly(rile, 2) + extremist_party + year + gdp_growth_lag +
                                                class * incumbency_status2_factor,
                                            data = dat_aggregated_class3,
                                            cluster = manifesto_id)



dat_aggregated_class3 <- dat_aggregated_class3 %>% 
    mutate(incumbency_status4_factor = ifelse(is.na(seat_share_cmp_lag), 
                                              "Not in Previous Parliament",
                                              as.character(incumbency_status3_factor)))

table(dat_aggregated_class3$incumbency_status4_factor)

dat_aggregated_class3$incumbency_status4_factor <- factor(dat_aggregated_class3$incumbency_status4_factor,
                                                          levels = c("Not in Previous Parliament",
                                                                     "Opposition", 
                                                                     "Non-PM Incumbent",
                                                                     "PM Incumbent"))


lm_sent_liwc_extended_gdp_inc4 <- lm_robust(sent_liwc_crabtreeetal ~ 
                                                incumbency_status4_factor * class + countryname +
                                                poly(rile, 2) + extremist_party + year + gdp_growth_lag,
                                            data = dat_aggregated_class3,
                                            cluster = manifesto_id)


# Table A13 ----
screenreg(list(lm_sent_liwc_extended_gdp_inc2,
               lm_sent_liwc_extended_gdp_inc4))


texreg(list(lm_sent_liwc_extended_gdp_inc2,
            lm_sent_liwc_extended_gdp_inc4),
       omit.coef = c("country*"),
       custom.coef.names = c("(Intercept)",
                             "RILE", 
                             "RILE$^2$",
                             "Extremist party",
                             "Year",
                             "GDP growth",
                             "Class: Past (ref.: Future)",
                             "Class: Present",
                             "Inc. (2 cat.): Incumbent",
                             "Inc. (2 cat.): Incumbent $\\times$ Past",
                             "Inc. (2 cat.): Incumbent $\\times$ Present",
                             "Inc. (4 cat.): Opposition",
                             "Inc. (4 cat.): Non-PM Incumbent",
                             "Inc. (4 cat.): PM Incumbent",
                             "Inc. (4 cat.): Opposition $\\times$ Past",
                             "Inc. (4 cat.): Non-PM Incumbent $\\times$ Past",
                             "Inc. (4 cat.): PM Incumbent $\\times$ Past",
                             "Inc. (4 cat.): Opposition $\\times$ Present",
                             "Inc. (4 cat.): Non-PM Incumbent $\\times$ Present",
                             "Inc. (4 cat.): PM Incumbent $\\times$ Present"
       ),
       float.pos = "!h",
       label = "tab:reg_sent_liwc_more_classes",
       fontsize = "footnotesize",
       caption.above =  TRUE,
       include.variance=FALSE,
       longtable=FALSE,
       use.packages=FALSE,
       custom.gof.names = c("R$^2$", "Adj. R$^2$",
                            "Number of observations",
                            "RMSE", "Number of clusters (Manifestos)"),
       file="taba13.tex",
       caption = "Predicting sentiment in party manifestos (using the LIWC sentiment dictionary and the aggregation formula recommended by \\textcite{crabtree19}  as dependent variable)",
       custom.note = ("\\parbox{0.98\\linewidth}{\\footnotesize \\vspace{2pt}%stars. \\\\
                      \\textit{Note}: 95\\% confidence intervals in parentheses. All models are
                      linear regressions with robust standard errors clustered by manifesto. 
                      Model 1 reports the regression used to estimate fitted/expected values reported in the paper. 
                      Model 2 reruns the model with a more detailed classification of incumbency status. 
                      The models include country dummies which are omitted from the table.}"),)


pred_sent_inc4 <- ggpredict(lm_sent_liwc_extended_gdp_inc4, 
                            terms = c("class", 
                                      "incumbency_status4_factor"))

pred_sent_inc4 <- pred_sent_inc4 %>% 
    mutate(tense = car::recode(x, "'1'='Past';'2'='Present';'3'='Future'"))

pred_sent_inc4$tense <- factor(pred_sent_inc4$tense, 
                               levels = c("Past", "Present", "Future"))

pred_sent_inc4$group <- factor(pred_sent_inc4$group,
                               levels = c("Not in Previous Parliament",
                                          "Opposition", 
                                          "Non-PM Incumbent",
                                          "PM Incumbent"))

# Figure A18 ----
ggplot(pred_sent_inc4, aes(x = tense, 
                           y = predicted,
                           ymin = conf.low,
                           ymax = conf.high, 
                           colour = group,
                           shape = group)) +
    geom_point(position = position_dodge(width = 0.35), size = 3.5) +
    geom_linerange(position = position_dodge(width = 0.35), size = 0.6) +
    geom_linerange(aes(ymin = predicted - 1.645 * std.error,
                       ymax = predicted + 1.645 * std.error), 
                   position = position_dodge(width = 0.35), 
                   size = 1.3) +
    scale_shape_manual(values = c(2, 17, 1, 16)) +
    scale_y_continuous(limits = c(-0.5, 4), breaks = c(seq(0, 4, 1))) +
    scale_colour_manual(values = c("red", "red", "black", "black")) +
    labs(x = NULL, y = "Sentiment") +
    theme(legend.title = element_blank(),
          legend.position = c(0.7, 0.2),
          legend.background = element_rect(colour = 'grey60', 
                                           fill = 'white', linetype='solid'),
          legend.text = element_text(size = 10))
ggsave("fga18.pdf",
       width = 5, height = 4)


# aggregate sentiment to manifesto level 
# to estimate correlations between dictionaries and formulas
dat_sent_manifestolevel <- dat_combined %>% 
    group_by(manifesto_id) %>% 
    mutate(sent_liwc_log = sent_log(positive_liwc, negative_liwc)) %>% 
    mutate(sent_lsd_log = sent_log(positive_lsd, negative_lsd, 
                                   neg_negative = neg_negative_lsd,
                                   neg_positive = neg_positive_lsd,
                                   negations = TRUE)) %>% 
    mutate(sent_rauh_log = sent_log(positive_rauh, negative_rauh,
                                    neg_negative = neg_negative_rauh,
                                    neg_positive = neg_positive_rauh)) %>% 
    mutate(sent_liwc15_log = sent_log(positive_liwc15, negative_liwc15)) %>% 
    mutate(sent_liwc_crabtreeetal = sent_crabtreeetal(positive_liwc, negative_liwc,
                                                      ntoken = ntoken)) %>% 
    mutate(sent_liwc15_crabtreeetal = sent_crabtreeetal(positive_liwc15, negative_liwc15,
                                                        ntoken = ntoken)) %>% 
    mutate(sent_lsd_crabtreeetal = sent_crabtreeetal(positive_lsd, negative_lsd,
                                                     neg_negative = neg_negative_lsd,
                                                     neg_positive = neg_positive_lsd,
                                                     ntoken = ntoken)) %>% 
    mutate(sent_rauh_crabtreeetal = sent_crabtreeetal(positive_rauh, negative_rauh,
                                                      neg_negative = neg_negative_rauh,
                                                      neg_positive = neg_positive_rauh,
                                                      ntoken = ntoken)) %>% 
    select(manifesto_id, countryname, language, starts_with("sent_")) %>% 
    unique()


# Figure A19 ----
dat_sentiment_en <- dat_sent_manifestolevel %>% 
    ungroup() %>%
    filter(language == "english") %>% 
    select(LIWC = sent_liwc_crabtreeetal, 
           `LIWC (log)` = sent_liwc_log,
           `LIWC 2015` = sent_liwc15_crabtreeetal,
           `LIWC 2015 (log)` = sent_liwc15_log,
           LSD = sent_lsd_crabtreeetal,
           `LSD (log)` = sent_lsd_log)

p_en <- ggpairs(dat_sentiment_en, mapping = aes(alpha = 0.01)) +
    theme(strip.text = element_text(size = 10),
          axis.text = element_text(size = 9))

pdf("fga19.pdf", width = 8, height = 8)
p_en
dev.off()


# Figure A20 ----
dat_sentiment_de <- dat_sent_manifestolevel %>% 
    ungroup() %>%
    filter(language == "german") %>% 
    select(LIWC = sent_liwc_crabtreeetal, 
           `LIWC (log)` = sent_liwc_log,
           LSD = sent_lsd_crabtreeetal,
           `LSD (log)` = sent_lsd_log,
           Rauh = sent_rauh_crabtreeetal,
           `Rauh (log)` = sent_rauh_log)

p_de <- ggpairs(dat_sentiment_de, mapping = aes(alpha = 0.01)) +
    theme(strip.text = element_text(size = 10),
          axis.text = element_text(size = 9))

pdf("fga20.pdf", width = 8, height = 8)
p_de
dev.off()



# update regressions with different dictionaries and aggregation methods

lm_liwc_prop <- lm_robust(sent_liwc_crabtreeetal ~ 
                              incumbency_status2_factor * class + countryname +
                              poly(rile, 2) + extremist_party + year + gdp_growth_lag,
                          data = dat_aggregated_class3,
                          cluster = manifesto_id)


lm_liwc_log <- update(lm_sent_liwc_extended_gdp, sent_liwc_log ~ .)
lm_lsd_log <- update(lm_sent_liwc_extended_gdp, sent_lsd_log ~ .)
lm_lsd_prop <- update(lm_sent_liwc_extended_gdp, sent_lsd_crabtreeetal ~ .)


# get expected/fitted values
pred_liwc_prop <- ggpredict(lm_liwc_prop, 
                            terms = c("class", 
                                      "incumbency_status2_factor")) %>% 
    mutate(model = "LIWC\n(Crabtree et al. 2020)")

pred_liwc_log <- ggpredict(lm_liwc_log, 
                           terms = c("class", 
                                     "incumbency_status2_factor")) %>% 
    mutate(model = "LIWC\n(Proksch et al. 2019)")

pred_lsd_prop <- ggpredict(lm_lsd_prop, 
                           terms = c("class", 
                                     "incumbency_status2_factor")) %>% 
    mutate(model = "Lexicoder Sentiment Dictionary\n(Crabtree et al. 2020)")

pred_lsd_log <- ggpredict(lm_lsd_log, 
                          terms = c("class", 
                                    "incumbency_status2_factor")) %>% 
    mutate(model = "Lexicoder Sentiment Dictionary\n(Proksch et al. 2019)")



pred_diff_dicts <- bind_rows(pred_lsd_log,
                             pred_lsd_prop,
                             pred_liwc_log,
                             pred_liwc_prop)


pred_diff_dicts <- pred_diff_dicts %>% 
    mutate(tense = car::recode(x, "'1'='Future';'2'='Past';'3'='Present'"))

pred_diff_dicts$tense <- factor(pred_diff_dicts$tense,
                                levels = c("Past", "Present", "Future"))

# Figure A21 ----
ggplot(pred_diff_dicts, aes(x = tense, 
                            y = predicted,
                            ymin = conf.low,
                            ymax = conf.high, 
                            colour = group,
                            shape = group)) +
    geom_point(position = position_dodge(width = 0.35), size = 3.5) +
    geom_linerange(position = position_dodge(width = 0.35), size = 0.6) +
    geom_linerange(aes(ymin = predicted - 1.645 * std.error,
                       ymax = predicted + 1.645 * std.error), 
                   position = position_dodge(width = 0.35), 
                   size = 1.3) +
    scale_shape_manual(values = c(17, 16)) +
    facet_wrap(~model, scales = "free") +
    scale_colour_manual(values = c("red", "black")) +
    labs(x = NULL, y = "Sentiment") +
    theme(legend.title = element_blank(),
          legend.position = "bottom")
ggsave("fga21.pdf",
       width = 10, height = 8)



# Table A14 ----
screenreg(list(lm_lsd_log,
               lm_lsd_prop,
               lm_liwc_log,
               lm_liwc_prop))


texreg(list(lm_lsd_log,
            lm_lsd_prop,
            lm_liwc_log,
            lm_liwc_prop),
       omit.coef = c("country*"),
       custom.model.names = c("M1 (LSD, log)",
                              "M2 (LSD)",
                              "M3 (LIWC, log)",
                              "M4 (LIWC)"),
       custom.coef.names = c("(Intercept)",
                             "Incumbent",
                             "Class: Past (ref.: Future)",
                             "Class: Present",
                             "RILE", 
                             "RILE$^2$",
                             "Extremist party",
                             "Year",
                             "GDP growth",
                             "Incumbent $\\times$ Past",
                             "Incumbent $\\times$ Present"
       ),
       float.pos = "!h",
       label = "tab:reg_sent_diff_dicts",
       fontsize = "footnotesize",
       caption.above =  TRUE,
       include.variance=FALSE,
       longtable=FALSE,
       use.packages=FALSE,
       custom.gof.names = c("R$^2$", "Adj. R$^2$",
                            "Number of observations",
                            "RMSE", "Number of clusters (Manifestos)"),
       file="taba14.tex",
       caption = "Sentiment in party manifestos (using different sentiment dictionaries and aggregation methods as dependent variables)",
       custom.note = ("\\parbox{0.9\\linewidth}{\\footnotesize \\vspace{2pt}%stars. \\\\
                      \\textit{Note}: 95\\% confidence intervals in parentheses.
                      All models are linear regressions with robust standard errors clustered by manifesto. 
                      The models include country dummies which are omitted from the table.}"),)



# run various additional models to test the robustness of results

lm_modelchoice_fe_basic <- lm_robust(sent_liwc_crabtreeetal ~ 
                                         incumbency_status2_factor * class + countryname,
                                     data = dat_aggregated_class3,
                                     cluster = manifesto_id)


lm_modelchoice_fe_basic <- lm_robust(sent_liwc_crabtreeetal ~ 
                                         incumbency_status2_factor * class + countryname,
                                     data = dat_aggregated_class3,
                                     cluster = manifesto_id)

lm_modelchoice_mixed <- lmer(sent_liwc_crabtreeetal ~ 
                                 incumbency_status2_factor * class + 
                                 (1 | countryname)  +  (1 | partyname) + (1 | election_id) +
                                 +  (1 | manifesto_id),
                             data = dat_aggregated_class3)


lm_modelchoice_fe_election_cluster_party <- lm_robust(sent_liwc_crabtreeetal ~ 
                                                          incumbency_status2_factor * class,
                                                      data = dat_aggregated_class3,
                                                      fixed_effects = election_id,
                                                      cluster = party)


## Table A15 ----
screenreg(list(lm_modelchoice_fe_basic, 
               lm_modelchoice_fe_election_cluster_party, 
               lm_modelchoice_mixed))

texreg(list(lm_modelchoice_fe_basic, 
            lm_modelchoice_fe_election_cluster_party, 
            lm_modelchoice_mixed),
       ci.force = TRUE, 
       omit.coef = c("(Intercept)"),
       float.pos = "!h",
       label = "tab:reg_sent_modelchoice",
       fontsize = "footnotesize",
       caption.above =  TRUE,
       longtable=FALSE,
       use.packages=FALSE,
       file="taba15.tex",
       caption = "Sentiment in party manifestos based on different model specifications (using the LIWC sentiment dictionary and the aggregation formula recommended by \\textcite{crabtree19}  as dependent variable)",
       custom.note = ("\\parbox{0.8\\linewidth}{\\footnotesize \\vspace{2pt}%stars. \\\\
                      \\textit{Note}: 95\\% confidence intervals in parentheses. 
                    Model 1 includes country dummies and robust clustered standard errors for each manifesto.
                    Model 2 includes election fixed effects and robust clustered standard errors for parties.
                    Model 3 is a multilevel regression with  country-, election-, party-, and manifesto-varying intercepts.}"),
       custom.coef.names = c("Incumbent", 
                             "Class: Past (ref.: Future)",
                             "Class: Present",
                             "Austria (ref.: Australia)",
                             "Canada", 
                             "Germany", 
                             "Ireland", 
                             "New Zealand",
                             "Switzerland", 
                             "United Kingdom",
                             "United States", 
                             "Incumbent $\\times$ Past",
                             "Incumbent $\\times$ Present"),
       custom.gof.names = c("R$^2$", "Adj. R$^2$",
                            "Num. obs.", "RMSE", 
                            "Number of clusters",
                            "AIC", "BIC", 
                            "Log Likelihood", 
                            "Num. groups: Manifesto",
                            "Num. groups: Election",
                            "Num. groups: Party",
                            "Num. groups: Country",
                            "Variance: Manifesto",
                            "Variance: Election",
                            "Variance: Party",
                            "Variance: Country",
                            "Variance: Residual"))


# jacknife-style regression (leaving one country out at a time)

pred_countries <- data.frame()
countrynames <- unique(dat_aggregated_class3$countryname)

for (i in countrynames) {
    
    lm_sent_liwc_basic_country <- lm_robust(sent_liwc_crabtreeetal ~ 
                                                incumbency_status2_factor * class + poly(rile, 2) + countryname,
                                            data = filter(dat_aggregated_class3,
                                                          countryname != i),
                                            cluster = manifesto_id)
    
    pred_liwc_country <- ggpredict(lm_sent_liwc_basic_country, 
                                   terms = c("class", 
                                             "incumbency_status2_factor")) %>% 
        mutate(model = paste(i, "excluded"))
    
    
    pred_countries <- bind_rows(pred_liwc_country, pred_countries)
}

pred_countries <- pred_countries %>% 
    mutate(tense = car::recode(x, "'1'='Future';'2'='Past';'3'='Present'"))

pred_countries$tense <- factor(pred_countries$tense, 
                               levels = c("Past", "Present", "Future"))

# Figure A22 ----
ggplot(pred_countries, aes(x = tense, 
                           y = predicted,
                           ymin = conf.low,
                           ymax = conf.high, 
                           colour = group,
                           shape = group)) +
    geom_point(position = position_dodge(width = 0.3), size = 3.5) +
    geom_linerange(position = position_dodge(width = 0.3), size = 0.6) +
    geom_linerange(aes(ymin = predicted - 1.645 * std.error,
                       ymax = predicted + 1.645 * std.error), 
                   position = position_dodge(width = 0.3), 
                   size = 1.3) +
    facet_wrap(~model) +
    scale_shape_manual(values = c(17, 16)) +
    scale_colour_manual(values = c("red", "black")) +
    labs(x = NULL, y = "Sentiment") +
    theme(legend.title = element_blank(),
          legend.position = "bottom")
ggsave("fga22.pdf",
       width = 10, height = 12)


# repeat jackknife approach for first differences

# empty dataframes which are used to store the results 
# from the first difference estimates
dat_fd_all_jackknife <- data.frame()
dat_fd_sum_jackknife <- data.frame()

countrynames <- unique(dat_aggregated_class3$countryname)

# square RILE variable
dat_aggregated_class3$rile_squared <- dat_aggregated_class3$rile^2


for (i in countrynames) {
    
    # subsetted data frame that excludes one country
    dat_filtered <- filter(dat_aggregated_class3,
                           countryname != i)
    
    # run Zelig model
    lm_zelig_subset <- zelig(sent_liwc_crabtreeetal ~ 
                                 incumbency_status2_factor * class +
                                 countryname +
                                 rile + rile_squared + extremist_party +
                                 year + gdp_growth_lag,
                             model = "ls",
                             data = dat_filtered)
    
    
    # set explanatory variable values
    x_past_inc <- setx(lm_zelig_subset, incumbency_status2_factor = "Incumbent", 
                       class = "Past")
    x_past_opp <- setx(lm_zelig_subset, incumbency_status2_factor = "Opposition", 
                       class = "Past")
    x_present_inc <- setx(lm_zelig_subset, incumbency_status2_factor = "Incumbent", 
                          class = "Present")
    x_present_opp <- setx(lm_zelig_subset, incumbency_status2_factor = "Opposition", 
                          class = "Present")
    x_future_inc <- setx(lm_zelig_subset, incumbency_status2_factor = "Incumbent", 
                         class = "Future")
    x_future_opp <- setx(lm_zelig_subset, incumbency_status2_factor = "Opposition", 
                         class = "Future")
    
    # simulations
    set.seed(34)
    lm_zelig_sim_past <- sim(lm_zelig_subset, 
                             x = x_past_opp, 
                             x1 = x_past_inc, 
                             num = 1000)
    
    set.seed(34)
    lm_zelig_sim_present <- sim(lm_zelig_subset, 
                                x = x_present_opp, 
                                x1 = x_present_inc, num = 1000)
    
    set.seed(34)
    lm_zelig_sim_future <- sim(lm_zelig_subset, 
                               x = x_future_opp, 
                               x1 = x_future_inc, num = 1000)
    
    
    # get first difference values
    fd_present <- unlist(lm_zelig_sim_present$sim.out[["x1"]][["fd"]])
    fd_future <- unlist(lm_zelig_sim_future$sim.out[["x1"]][["fd"]])
    fd_past <- unlist(lm_zelig_sim_past$sim.out[["x1"]][["fd"]])
    
    fd_future_dataframe <- data.frame(fd = fd_future,
                                      class = "Future")
    fd_past_dataframe <- data.frame(fd = fd_past,
                                    class = "Past")
    fd_present_dataframe <- data.frame(fd = fd_present,
                                       class = "Present")
    
    
    fd_dataframe_merged <- bind_rows(fd_present_dataframe,
                                     fd_past_dataframe,
                                     fd_future_dataframe) %>% 
        mutate(model = paste(i, "excluded"))
    
    
    dat_fd_all_jackknife <- bind_rows(fd_dataframe_merged,
                                      dat_fd_all_jackknife)
    
    # summarise the simulations
    dat_fd_sum_future <- data.frame(
        mean = mean(fd_future),
        sd = sd(fd_future),
        ci_95_low  = quantile(fd_future, .025),
        ci_95_high = quantile(fd_future, 0.975),
        ci_90_low  = quantile(fd_future, .05),
        ci_90_high = quantile(fd_future, 0.95),
        class = "Future")
    
    dat_fd_sum_past <- data.frame(
        mean = mean(fd_past),
        sd = sd(fd_past),
        ci_95_low  = quantile(fd_past, .025),
        ci_95_high = quantile(fd_past, 0.975),
        ci_90_low  = quantile(fd_past, .05),
        ci_90_high = quantile(fd_past, 0.95),
        class = "Past")
    
    
    dat_fd_sum_present <- data.frame(
        mean = mean(fd_present),
        sd = sd(fd_present),
        ci_95_low  = quantile(fd_present, .025),
        ci_95_high = quantile(fd_present, 0.975),
        ci_90_low  = quantile(fd_present, .05),
        ci_90_high = quantile(fd_present, 0.95),
        class = "Present")
    
    # combine the summarised first differences
    dat_fd_sum_merged <- bind_rows(dat_fd_sum_present,
                                   dat_fd_sum_past,
                                   dat_fd_sum_future) %>% 
        mutate(model = paste(i, "excluded"))
    
    
    # save the model-specific first-differences
    dat_fd_sum_jackknife <- bind_rows(dat_fd_sum_merged,
                                      dat_fd_sum_jackknife)
}


dat_fd_all_jackknife$class <- factor(dat_fd_all_jackknife$class,
                                     levels = c("Future", "Present", "Past"))

# Figure A23 ----
ggplot(dat_fd_all_jackknife, aes(x = fd)) + 
    geom_density(aes(fill = class), 
                 alpha = 0.4, 
                 colour = NA) + 
    geom_rug(aes(colour = class),
             alpha = 0.3) +
    facet_wrap(~model) +
    geom_vline(xintercept = 0, linetype = "dashed", colour = "red") + 
    labs(x = "First differences in sentiment between incumbents and opposition parties", 
         y = NULL) +
    scale_colour_manual(values = c("grey10",
                                   "#E8940E",
                                   "#3BACE5")) +
    scale_fill_manual(values = c("grey10",
                                 "#E8940E",
                                 "#3BACE5")) +
    theme(axis.text.y = element_blank(),
          axis.ticks.y = element_blank(),
          legend.title = element_blank())
ggsave("fga23.pdf", 
       width = 10, height = 6)


dat_fd_sum_jackknife$class <- factor(dat_fd_sum_jackknife$class,
                                     levels = c("Future", "Present", "Past"))


# Figure A24 ----
ggplot(dat_fd_sum_jackknife, aes(x = class, 
                                 y = mean,
                                 colour = class)) +
    geom_point(size = 3) +
    geom_linerange(aes(ymin = ci_95_low,
                       ymax = ci_95_high),
                   size = 0.6) +
    geom_linerange(aes(ymin = ci_90_low,
                       ymax = ci_90_high), 
                   size = 1.3) +
    facet_wrap(~model) +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "red") +
    coord_flip() +
    scale_colour_manual(values = c("grey10",
                                   "#E8940E",
                                   "#3BACE5")) +
    labs(x = NULL,
         y = "First differences in sentiment between incumbents and opposition parties") +
    theme(legend.position = "none")
ggsave("fga24.pdf", 
       width = 10, height = 5)


# repeat but this time use different dictionaries and aggregations 
# as dependent variables and simulate first differences

dat_fd_sum_dictionaries <- data.frame()
fd_dataframe_merged_dictionaries <- data.frame()

unique_dicts <- 
    c("sent_liwc_crabtreeetal",
      "sent_liwc_log", 
      "sent_lsd_log",
      "sent_lsd_crabtreeetal")

for (i in unique_dicts) {
    
    # run Zelig model
    lm_zelig_dictionary <- zelig(
        paste(i[[1]], "~incumbency_status2_factor * class +
                             countryname +
                             rile + rile_squared + extremist_party +
                             year + gdp_growth_lag"),
        model = "ls",
        data = dat_aggregated_class3)
    
    
    # set explanatory variable values
    x_past_inc <- setx(lm_zelig_dictionary, incumbency_status2_factor = "Incumbent", 
                       class = "Past")
    x_past_opp <- setx(lm_zelig_dictionary, incumbency_status2_factor = "Opposition", 
                       class = "Past")
    x_present_inc <- setx(lm_zelig_dictionary, incumbency_status2_factor = "Incumbent", 
                          class = "Present")
    x_present_opp <- setx(lm_zelig_dictionary, incumbency_status2_factor = "Opposition", 
                          class = "Present")
    x_future_inc <- setx(lm_zelig_dictionary, incumbency_status2_factor = "Incumbent", 
                         class = "Future")
    x_future_opp <- setx(lm_zelig_dictionary, incumbency_status2_factor = "Opposition", 
                         class = "Future")
    
    # simulations
    set.seed(34)
    lm_zelig_sim_past <- sim(lm_zelig_dictionary, 
                             x = x_past_opp, 
                             x1 = x_past_inc, 
                             num = 1000)
    
    set.seed(34)
    lm_zelig_sim_present <- sim(lm_zelig_dictionary, 
                                x = x_present_opp, 
                                x1 = x_present_inc, num = 1000)
    
    set.seed(34)
    lm_zelig_sim_future <- sim(lm_zelig_dictionary, 
                               x = x_future_opp, 
                               x1 = x_future_inc, num = 1000)
    
    
    # get first difference values
    fd_present <- unlist(lm_zelig_sim_present$sim.out[["x1"]][["fd"]])
    fd_future <- unlist(lm_zelig_sim_future$sim.out[["x1"]][["fd"]])
    fd_past <- unlist(lm_zelig_sim_past$sim.out[["x1"]][["fd"]])
    
    fd_future_dataframe <- data.frame(fd = fd_future,
                                      class = "Future")
    fd_past_dataframe <- data.frame(fd = fd_past,
                                    class = "Past")
    fd_present_dataframe <- data.frame(fd = fd_present,
                                       class = "Present")
    
    
    fd_dataframe_merged <- bind_rows(fd_present_dataframe,
                                     fd_past_dataframe,
                                     fd_future_dataframe) %>% 
        mutate(model = i)
    
    
    fd_dataframe_merged_dictionaries <- bind_rows(fd_dataframe_merged,
                                                  fd_dataframe_merged_dictionaries)
    
    # summarise the simulations
    dat_fd_sum_future <- data.frame(
        mean = mean(fd_future),
        sd = sd(fd_future),
        ci_95_low  = quantile(fd_future, .025),
        ci_95_high = quantile(fd_future, 0.975),
        ci_90_low  = quantile(fd_future, .05),
        ci_90_high = quantile(fd_future, 0.95),
        class = "Future")
    
    dat_fd_sum_past <- data.frame(
        mean = mean(fd_past),
        sd = sd(fd_past),
        ci_95_low  = quantile(fd_past, .025),
        ci_95_high = quantile(fd_past, 0.975),
        ci_90_low  = quantile(fd_past, .05),
        ci_90_high = quantile(fd_past, 0.95),
        class = "Past")
    
    
    dat_fd_sum_present <- data.frame(
        mean = mean(fd_present),
        sd = sd(fd_present),
        ci_95_low  = quantile(fd_present, .025),
        ci_95_high = quantile(fd_present, 0.975),
        ci_90_low  = quantile(fd_present, .05),
        ci_90_high = quantile(fd_present, 0.95),
        class = "Present")
    
    # combine the summarised first differences
    dat_fd_sum_merged <- bind_rows(dat_fd_sum_present,
                                   dat_fd_sum_past,
                                   dat_fd_sum_future) %>% 
        mutate(model = i)
    
    
    # save the model-specific first-differences
    dat_fd_sum_dictionaries <- bind_rows(dat_fd_sum_merged,
                                         dat_fd_sum_dictionaries)
}


# plot first difference coefficients
dat_fd_sum_dictionaries$class <- factor(dat_fd_sum_dictionaries$class,
                                        levels = c("Future", "Present", "Past"))

fd_dataframe_merged_dictionaries$class <- factor(fd_dataframe_merged_dictionaries$class,
                                                 levels = c("Future", "Present", "Past"))


# give models nicer names
recode_dictionaries <- 
    c("'sent_lsd_crabtreeetal'='Lexicoder Sentiment Dictionary\\n(Crabtree et al. 2020)';
    'sent_lsd_log'='Lexicoder Sentiment Dictionary\\n(Proksch et al. 2019)';
    'sent_liwc_crabtreeetal'='LIWC\\n(Crabtree et al. 2020)';
    'sent_liwc_log'='LIWC\\n(Proksch et al. 2019)'")

fd_dataframe_merged_dictionaries <- fd_dataframe_merged_dictionaries %>% 
    mutate(model_recoded = car::recode(model, recode_dictionaries))

# Figure A25 ----
ggplot(fd_dataframe_merged_dictionaries, aes(x = fd)) + 
    geom_density(aes(fill = class), 
                 alpha = 0.4, 
                 colour = NA) + 
    geom_rug(aes(colour = class),
             alpha = 0.3) +
    facet_wrap(~model_recoded, scales = "free") +
    geom_vline(xintercept = 0, linetype = "dashed", colour = "red") + 
    labs(x = "First differences in sentiment between incumbents and opposition parties", 
         y = NULL) +
    scale_colour_manual(values = c("grey10",
                                   "#E8940E",
                                   "#3BACE5")) +
    scale_fill_manual(values = c("grey10",
                                 "#E8940E",
                                 "#3BACE5")) +
    theme(axis.text.y = element_blank(),
          axis.ticks.y = element_blank(),
          legend.title = element_blank())
ggsave("fga25.pdf", 
       width = 10, height = 6)


dat_fd_sum_dictionaries <- dat_fd_sum_dictionaries %>% 
    mutate(model_recoded = car::recode(model, recode_dictionaries))

# Figure A26 ----
ggplot(dat_fd_sum_dictionaries, aes(x = class, 
                                    y = mean,
                                    colour = class)) +
    geom_point(size = 3) +
    geom_linerange(aes(ymin = ci_95_low,
                       ymax = ci_95_high),
                   size = 0.6) +
    geom_linerange(aes(ymin = ci_90_low,
                       ymax = ci_90_high), 
                   size = 1.3) +
    facet_wrap(~model_recoded, scales = "free_x") +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "red") +
    coord_flip() +
    scale_colour_manual(values = c("grey10",
                                   "#E8940E",
                                   "#3BACE5")) +
    labs(x = NULL,
         y = "First differences in sentiment between incumbents and opposition parties") +
    theme(legend.position = "none")
ggsave("fga26.pdf", 
       width = 10, height = 4)



# run Zelig model for first difference simulation (displayed in main paper)

lm_zelig <- zelig(sent_liwc_crabtreeetal ~ 
                      incumbency_status2_factor * class +
                      countryname +
                      rile + rile_squared + extremist_party +
                      year + gdp_growth_lag,
                  model = "ls",
                  data = dat_aggregated_class3)


# Table A16 ----
screenreg(lm_zelig)

texreg(list(lm_zelig),
       ci.force = TRUE, 
       float.pos = "!h",
       label = "tab:reg_zelig",
       fontsize = "footnotesize",
       caption.above =  TRUE,
       longtable=FALSE,
       use.packages=FALSE,
       file="taba16.tex",
       caption = "Predicting sentiment in party manifestos (using the LIWC sentiment dictionary and the aggregation formula recommended by \\textcite{crabtree19}  as dependent variable)",
       custom.note = ("\\parbox{0.75\\linewidth}{\\footnotesize \\vspace{2pt}%stars. \\\\
                      \\textit{Note}: The model reported in this table is used to simulate first differences.
                      95\\% confidence intervals in parentheses.}"),
       custom.coef.names = c("(Intercept)",
                             "Incumbent", 
                             "Class: Past (ref.: Future)",
                             "Class: Present",
                             "Country: Austria (ref.: Australia)",
                             "Country: Canada",
                             "Country: Germany",
                             "Country: Ireland",
                             "Country: New Zealand",
                             "Country: Switzerland",
                             "Country: United Kingdom",
                             "Country: United States",
                             "RILE",
                             "RILE$^2$",
                             "Extremist party",
                             "Year",
                             "GDP growth",
                             "Incumbent $\\times$ Past",
                             "Incumbent $\\times$ Present"),
       custom.gof.names = c("R$^2$", "Adj. R$^2$", 
                            "Number of observations",
                            "RMSE"))


# new data frame for post hoc comparisons
dat_aggregated_class3_contrasts <- dat_aggregated_class3
dat_aggregated_class3_contrasts$incumbency_status2_factor <- relevel(dat_aggregated_class3_contrasts$incumbency_status2_factor,
                                                                     ref = "Incumbent")

lm_liwc_contrast <- lm_robust(sent_liwc_crabtreeetal ~ 
                                  incumbency_status2_factor * class + countryname +
                                  rile + rile_squared + extremist_party + year + gdp_growth_lag,
                              data = dat_aggregated_class3_contrasts,
                              cluster = manifesto_id)


# https://aosmith.rbind.io/2019/03/25/getting-started-with-emmeans/#confidence-intervals-for-comparisons
rg2 <- qdrg(object = lm_liwc_contrast, data = 
                dat_aggregated_class3_contrasts)

contrasts_class_inc <- emmeans(rg2, pairwise ~  incumbency_status2_factor |class)$contrasts %>% 
    broom::tidy() 

contrasts_class_inc$class <- factor(contrasts_class_inc$class,
                                    levels = c("Future", "Present", "Past"))

# Figure A27 ----
ggplot(contrasts_class_inc, aes(x = class,
                                y = estimate)) +
    geom_point(size = 3) +
    geom_linerange(aes(ymin = estimate - 1.96 * std.error,
                       ymax = estimate + 1.96 * std.error),
                   size = 0.6) +
    geom_linerange(aes(ymin = estimate - 1.645 * std.error,
                       ymax = estimate + 1.645 * std.error), 
                   size = 1.3) +
    scale_y_continuous(limits = c(-0.2, 1.5), breaks = c(seq(-0.2, 1.4, 0.2))) +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "red") +
    coord_flip() +
    labs(x = NULL,
         y = "Pairwise post hoc comparisons")
ggsave("fga27.pdf", 
       width = 6, height = 2.5)


# rerun the model for each class separately to inspect the coefficient of incumbency

# only keep manifestos with at least one sentence on the past, present, and future
dat_aggregated_class3_full <- dat_aggregated_class3 %>% 
    group_by(manifesto_id) %>% 
    mutate(n_obs_man = n()) %>% 
    filter(n_obs_man == 3)

dat_aggregated_class3_full$class <- factor(dat_aggregated_class3_full$class,
                                      levels = c("Future", "Past", "Present"))


lm_sent_liwc_extended_gdp_past <- lm_robust(sent_liwc_crabtreeetal ~ 
                                                incumbency_status2_factor  +
                                                poly(rile, 2) + extremist_party + year,
                                            fixed_effects = countryname, clusters = party,
                                            data = filter(dat_aggregated_class3_full, class == "Past"))

lm_sent_liwc_extended_gdp_present <- lm_robust(sent_liwc_crabtreeetal ~ 
                                                   incumbency_status2_factor  +
                                                   poly(rile, 2) + extremist_party + year,
                                               fixed_effects = countryname, clusters = party,
                                               data = filter(dat_aggregated_class3_full, class == "Present"))

lm_sent_liwc_extended_gdp_future <- lm_robust(sent_liwc_crabtreeetal ~ 
                                                  incumbency_status2_factor +
                                                  poly(rile, 2) + extremist_party + year,
                                              fixed_effects = countryname, clusters = party,
                                              data = filter(dat_aggregated_class3_full, class == "Future"))


# Table A17 ----
screenreg(list(lm_sent_liwc_extended_gdp_past,
               lm_sent_liwc_extended_gdp_present,
               lm_sent_liwc_extended_gdp_future))


texreg(list(lm_sent_liwc_extended_gdp_past,
            lm_sent_liwc_extended_gdp_present,
            lm_sent_liwc_extended_gdp_future),
       custom.model.names = c("Model 1 (Past)", 
                              "Model 2 (Present)",
                              "Model 3 (Future)"),
       custom.coef.names = c("Incumbent",
                             "RILE", 
                             "RILE$^2$",
                             "Extremist party",  
                             "Year"),
       float.pos = "!h",
       label = "tab:reg_sent_liwc_main_separate_classes",
       fontsize = "footnotesize",
       caption.above =  TRUE,
       include.variance=FALSE,
       longtable=FALSE,
       use.packages=FALSE,
       custom.gof.names = c("R$^2$", "Adj. R$^2$",
                            "Number of observations",
                            "RMSE", "Number of clusters (Parties)"),
       file="taba17.tex",
       caption = "Predicting sentiment in party manifestos separately for each temporal perspective (using the LIWC sentiment dictionary and the aggregation formula recommended by \\textcite{crabtree19}  as dependent variable)",
       custom.note = ("\\parbox{0.85\\linewidth}{\\footnotesize \\vspace{2pt}%stars. \\\\
                      \\textit{Note}: 95\\% confidence intervals in parentheses. Model 1 filters the manifesto-observations of sentiment 
                      in sentences classified as past, Model 2 filters manifesto-observations of sentiment in sentences classified as present, 
                      Model 3 filters manifesto-observations of sentiment in sentences classified as future. 
                      All models are linear regressions with country-fixed effects and robust standard errors clustered by manifesto. 
                      Models only consider manifestos with at least one sentence devoted to the past, present, and future.}"))


# run the regression on the level of sentences and use the 
# probability of a sentence being classified as "Future" as an indicator for 
# the degree of retrospective/prospective communcation

# calculate sentiment for each sentence
dat_combined <- dat_combined %>% 
    group_by(manifesto_id, class) %>% 
    mutate(sentiment_sentence_liwc = 100 * ((positive_liwc - negative_liwc) / ntoken)) %>% 
    ungroup() 


# model without economic controls and sentence-level sentiment as the DV
lm_sent_sentence_future_base <- lmer(sentiment_sentence_liwc ~ 
                                         incumbency_status2_factor * Future + 
                                         poly(rile, 2) + 
                                         year + 
                                         (1 | countryname)  +  
                                         (1 | partyname) + 
                                         (1 | election_id) +
                                         (1 | manifesto_id),
                                     data = dat_combined)


# model with gdp growth and sentence-level sentiment as the DV
lm_sent_sentence_future <- lmer(sentiment_sentence_liwc ~ 
                                    incumbency_status2_factor * Future + 
                                    poly(rile, 2) + year +  gdp_growth_lag +
                                    (1 | countryname)  +  
                                    (1 | partyname) +
                                    (1 | election_id) +
                                    (1 | manifesto_id),
                                data = dat_combined)

# get expected values
pred_sent_inc2_sentence <- ggpredict(lm_sent_sentence_future, 
                                     terms = c("Future", 
                                               "incumbency_status2_factor"))

# Figure A28 ----
p1 <- ggplot(pred_sent_inc2_sentence, aes(x = x, 
                                          y = predicted,
                                          ymin = conf.low,
                                          ymax = conf.high,
                                          fill = group)) +
    geom_ribbon(aes(ymin = predicted - 1.645 * std.error,
                    ymax = predicted + 1.645 * std.error),
                alpha = 0.4) +
    geom_ribbon(alpha = 0.2) +
    geom_line(size = 0.8) +
    scale_fill_manual(values = c("red", "grey50"),
                      guide = guide_legend(reverse = TRUE)) +
    labs(x = NULL, y = "Sentiment") + 
    theme(legend.title = element_blank(),
          legend.position = c(0.8, 0.2),
          legend.background = element_rect(colour = 'grey60', fill = 'white', linetype='solid'),
          legend.text = element_text(size = 12))

p1

# plot distribution of independent variable
p2 <- ggplot(dat_combined, aes(x = Future, 
                               colour = incumbency_status2_factor,
                               fill = incumbency_status2_factor)) +
    geom_density(alpha = 0.2) +
    scale_fill_manual(values = c("red", "grey50")) +
    scale_colour_manual(values = c("red", "grey50")) +
    theme(legend.position = "none",
          axis.ticks.y = element_blank(),
          axis.text.y = element_blank()) +
    labs(x = "Probability of sentence being classified as 'Future'",
         y = "Density")

plot_grid(p1, p2, ncol = 1, align = 'v', rel_heights = c(2, 1))
ggsave("fga28.pdf",
       width = 5, height = 6)


# similar procedure, but use a gam line to check sure whether 
# the findings from the linear regression are not caused by the model specification

# Figure A29 ----
ggplot(dat_combined, aes(x = Future, y = sentiment_sentence_liwc,
                         fill = incumbency_status2_factor)) +
    geom_smooth(alpha = 0.3, colour = "black") +
    scale_fill_manual(values = c("red", "grey50"),
                      guide = guide_legend(reverse = TRUE)) +
    labs(x = "Probability of sentence classified as 'Future'", 
         y = "Sentiment") + 
    theme(legend.title = element_blank(),
          legend.position = c(0.8, 0.2),
          legend.background = element_rect(colour = 'grey60', fill = 'white', linetype='solid'),
          legend.text = element_text(size = 12))
ggsave("fga29.pdf",
       width = 5, height = 4)



# Table A18 ----
screenreg(list(lm_sent_sentence_future_base,
               lm_sent_sentence_future))


texreg(list(lm_sent_sentence_future_base, 
            lm_sent_sentence_future),
       ci.force = TRUE, 
       omit.coef = c("(Intercept)"),
       float.pos = "!h",
       label = "tab:reg_sent_sentences",
       fontsize = "footnotesize",
       caption.above =  TRUE,
       longtable=FALSE,
       use.packages=FALSE,
       file="taba18.tex",
       caption = "Predicting sentiment in party manifestos on the level of sentences (using the LIWC sentiment dictionary and the aggregation formula recommended by \\textcite{crabtree19}  as dependent variable)",
       custom.note = ("\\parbox{0.75\\linewidth}{\\footnotesize \\vspace{2pt}%stars. \\\\
                      \\textit{Note}: Models are linear multilevel regressions with  country-, election-, party-, and manifesto-varying intercepts.
                      95\\% confidence intervals in parentheses.}"),
       custom.coef.names = c("Incumbent", 
                             "Prob. 'Future'",
                             "RILE",
                             "RILE$^2$",
                             "Year",
                             "Incumbent $\\times$ Prob. 'Future'",
                             "GDP growth"),
       custom.gof.names = c("AIC", "BIC", 
                            "Log Likelihood", "Num. obs.",
                            "Num. groups: Manifesto",
                            "Num. groups: Election",
                            "Num. groups: Party",
                            "Num. groups: Country",
                            "Variance: Manifesto",
                            "Variance: Election",
                            "Variance: Party",
                            "Variance: Country",
                            "Variance: Residual"))


# calculate difference in manifesto-level sentiment in statements on the past and future

# transform data frame to "wide" format
dat_aggregated_class3_wide <- dat_aggregated_class3 %>% 
    select(manifesto_id, class, sent_liwc_crabtreeetal) %>% 
    spread(class, sent_liwc_crabtreeetal) %>% 
    mutate(sent_liwc_crabtreeetal_past_minus_future = Past - Future)

# join back to the full dataset but only select one observation per manifesto
dat_aggregated_class3_sent_alt <- left_join(dat_aggregated_class3, dat_aggregated_class3_wide,
                                            by = "manifesto_id") %>% 
    filter(class == "Past")


# run regression model
lm_sent_liwc_past_minus_future <- lm_robust(sent_liwc_crabtreeetal_past_minus_future ~ 
                                                countryname +
                                                poly(rile, 2) + extremist_party + year + gdp_growth_lag + 
                                                incumbency_status2_factor,
                                            data = dat_aggregated_class3_sent_alt,
                                            cluster = election_id)

# run model with finer measure of incumbency status
lm_sent_liwc_past_minus_future_inc4 <- lm_robust(sent_liwc_crabtreeetal_past_minus_future ~ 
                                                     incumbency_status4_factor + 
                                                     countryname +
                                                     poly(rile, 2) + extremist_party + year + gdp_growth_lag,
                                                 data = dat_aggregated_class3_sent_alt,
                                                 cluster = election_id)

sd(dat_aggregated_class3_sent_alt$sent_liwc_crabtreeetal_past_minus_future)


# Table A19 ----
screenreg(list(lm_sent_liwc_past_minus_future, 
               lm_sent_liwc_past_minus_future_inc4))


texreg(list(lm_sent_liwc_past_minus_future, 
            lm_sent_liwc_past_minus_future_inc4),
       omit.coef = c("country*"),
       custom.coef.names = c("(Intercept)",
                             "RILE", 
                             "RILE$^2$",
                             "Extremist party",
                             "Year",
                             "GDP growth",
                             "Inc. (2 cat.): Incumbent (ref.: Opposition)",
                             "Inc. (4 cat.): Opposition (ref.: Not in Previous Parliament)",
                             "Inc. (4 cat.): Non-PM Incumbent",
                             "Inc. (4 cat.): PM Incumbent"
       ),
       float.pos = "!h",
       label = "tab:sent_past_minus_future",
       fontsize = "footnotesize",
       caption.above =  TRUE,
       include.variance=FALSE,
       longtable=FALSE,
       use.packages=FALSE,
       custom.gof.names = c("R$^2$", "Adj. R$^2$",
                            "Number of observations",
                            "RMSE", "Number of clusters (Elections)"),
       file="taba19.tex",
       caption = "Predicting the difference in sentiment between statements on the past and on the future in each party manifesto (using the LIWC sentiment dictionary and the aggregation formula recommended by \\textcite{crabtree19}  as dependent variable)",
       custom.note = ("\\parbox{0.98\\linewidth}{\\footnotesize \\vspace{2pt}%stars. \\\\
                      \\textit{Note}: 95\\% confidence intervals in parentheses. Model 1 uses a binary classification of incumbency,
                       Model 2 applies a more detailed classification. All models are
                      linear regressions with robust standard errors clustered by manifesto. 
                      The models include country dummies which are omitted from the table.}"),)



pred_sent_past_minus_future <- ggpredict(lm_sent_liwc_past_minus_future_inc4, 
                                         terms = c("incumbency_status4_factor"))

pred_sent_past_minus_future <- pred_sent_past_minus_future %>% 
    mutate(x = car::recode(x, 
                           "'1'='Not in Previous Parliament';
                              '2'='Opposition'; '3'='Non-PM Incumbent';
                              '4'='PM Incumbent'"))

pred_sent_past_minus_future$x <- factor(pred_sent_past_minus_future$x,
                                        levels = c("Not in Previous Parliament",
                                                   "Opposition", 
                                                   "Non-PM Incumbent",
                                                   "PM Incumbent"))

# Figure A30 ----
ggplot(pred_sent_past_minus_future, aes(x = x, 
                                        y = predicted,
                                        ymin = conf.low,
                                        ymax = conf.high, 
                                        colour = x,
                                        shape = x)) +
    geom_point(position = position_dodge(width = 0.35), size = 3.5) +
    geom_linerange(position = position_dodge(width = 0.35), size = 0.6) +
    geom_linerange(aes(ymin = predicted - 1.645 * std.error,
                       ymax = predicted + 1.645 * std.error), 
                   position = position_dodge(width = 0.35), 
                   size = 1.3) +
    scale_shape_manual(values = c(2, 17, 1, 16)) +
    scale_y_continuous(limits = c(-4, 1)) +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
    scale_colour_manual(values = c("red", "red", "black", "black")) +
    labs(x = NULL, y = "Difference in Sentiment (past) and Sentiment (future)") +
    theme(legend.position = "none")
ggsave("fga30.pdf",
       width = 10, height = 5)



# check most frequent positive and negative terms

# load translated LSD dictionary
data_dictionary_lsdgerman <- readRDS("data_dictionary_lsdgerman.rds")

# create text corpus and tokenize corpus
corp <- corpus(dat_combined)
toks <- tokens(corp)

# select now many words to retrieve for each category
n_words <- 20

# get most frequent English positive terms by class
dat_en_pos <- toks %>% 
    tokens_subset(language == "english") %>% 
    tokens_remove(pattern = "ireland*") %>% 
    tokens_keep(pattern = data_dictionary_LSD2015$positive) %>% 
    dfm() %>% 
    textstat_frequency(n = n_words, groups = "class") %>% 
    mutate(type = "Positive",
           language = "English")

# get most frequent English negative terms by class
dat_en_neg <- toks %>% 
    tokens_subset(language == "english") %>% 
    tokens_remove(pattern = "ireland*") %>% 
    tokens_keep(pattern = data_dictionary_LSD2015$negative) %>% 
    dfm() %>% 
    textstat_frequency(n = n_words, groups = "class") %>% 
    mutate(type = "Negative",
           language = "English")


# get most frequent German positive terms by class
dat_de_posemo <- toks %>% 
    tokens_subset(language == "german") %>% 
    tokens_keep(pattern = data_dictionary_lsdgerman$pos) %>% 
    dfm() %>% 
    dfm_group(groups = "class") %>% 
    textstat_frequency(n = n_words, groups = "class") %>% 
    mutate(type = "Positive",
           language = "English")

# get most frequent German negative terms by class
dat_de_negemo <- toks %>% 
    tokens_subset(language == "german") %>% 
    tokens_keep(pattern = data_dictionary_lsdgerman$neg) %>% 
    dfm() %>% 
    textstat_frequency(n = n_words, groups = "class") %>% 
    mutate(type = "Negative",
           language = "English")


# bind data frames
dat_en_pos_neg <- bind_rows(dat_en_pos,
                            dat_en_neg)

dat_en_pos_neg$group <- factor(dat_en_pos_neg$group,
                               levels = c("Past", "Present", "Future"))

dat_de_pos_neg <- bind_rows(dat_de_posemo,
                            dat_de_negemo)

dat_de_pos_neg$group <- factor(dat_de_pos_neg$group,
                               levels = c("Past", "Present", "Future"))

# Figure A31 ----
ggplot(data = dat_en_pos_neg, 
       aes(x = factor(nrow(dat_en_pos_neg):1), y = frequency)) +
    geom_bar(width = 0.05, stat = "identity") +
    geom_point(size = 2) +
    facet_wrap(type~group, scales = "free", nrow = 2) +
    coord_flip() +
    scale_x_discrete(breaks = nrow(dat_en_pos_neg):1,
                     labels = dat_en_pos_neg$feature) +
    labs(x = NULL, y = "Frequency") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 10))
ggsave("fga31.pdf", width = 10, height = 9)


# Figure A32 ----
ggplot(data = dat_de_pos_neg, 
       aes(x = factor(nrow(dat_de_pos_neg):1), y = frequency)) +
    geom_bar(width = 0.05, stat = "identity") +
    geom_point(size = 2) +
    facet_wrap(type~group, scales = "free", nrow = 2) +
    coord_flip() +
    scale_x_discrete(breaks = nrow(dat_de_pos_neg):1,
                     labels = dat_de_pos_neg$feature) +
    labs(x = NULL, y = "Frequency") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 10))
ggsave("fga32.pdf", width = 10, height = 9)

